In [1]:
import altair as alt
import json
import numpy as np
import pandas as pd

alt.data_transformers.disable_max_rows()
Out[1]:
DataTransformerRegistry.enable('default')
In [2]:
df = pd.read_csv('../data/tirol_obituaries_deduped_weekly.csv', delimiter = ',')
In [3]:
df.head()
Out[3]:
district municipaly year week count
0 Kufstein Kirchbichl 2017 1 1
1 Kufstein Kirchbichl 2017 2 2
2 Kufstein Kirchbichl 2017 3 0
3 Kufstein Kirchbichl 2017 4 2
4 Kufstein Kirchbichl 2017 5 2
In [4]:
#http://www.statistik.at/wcm/idc/idcplg?IdcService=GET_NATIVE_FILE&RevisionSelectionMethod=LatestReleased&dDocName=122943
statistik_austria = pd.DataFrame({
    'week': [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1],
    'death_count': [160,153,152,136,130,128,137,123,117,119,132,133,138,136,123]
})
statistik_austria.head()
Out[4]:
week death_count
0 15 160
1 14 153
2 13 152
3 12 136
4 11 130
In [5]:
base = alt.Chart(df, width=470, height=200)

x_domain=(1,52)

week_cut_off='17'

week_point = base.transform_filter(
    'datum.year==2020 & datum.week<'+week_cut_off
).mark_circle(color='red', opacity=0, size=150).encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('sum(count)', title=''),
    tooltip=['year', 'week', 'sum(count)']
)

week_historic_area = base.transform_filter(
    'datum.year!=2020 && datum.year!=2016'
).transform_joinaggregate(
    week_count='sum(count)',
    groupby=['year', 'week']
).mark_area(color='lightgray', opacity=0.5, strokeJoin='round').encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('max(week_count):Q', title=''),
    alt.Y2('min(week_count):Q', title='')
)

week_historic_mean = base.transform_filter(
    'datum.year!=2020 && datum.year!=2016'
).transform_joinaggregate(
    week_count='sum(count)',
    groupby=['year', 'week']
).mark_line(color='gray', opacity=0.25, strokeJoin='round').encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('mean(week_count):Q', title='')
)

week_current_line = base.transform_filter(
    'datum.year==2020 & datum.week<'+week_cut_off
).mark_line(strokeJoin='round').encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('sum(count):Q', title='', axis=alt.Axis(domainOpacity=0,ticks=False)),
    tooltip=['year:Q', 'week:Q', 'sum(count):Q']
)

week_statistik_austria_line = alt.Chart(statistik_austria).mark_line(clip=True,color='orange', strokeJoin='round').encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('death_count:Q', title='', axis=alt.Axis(domainOpacity=0,ticks=False)),
    tooltip=['week:Q', 'death_count:Q']
)

week_statistik_austria_line_80 = alt.Chart(statistik_austria).transform_calculate(
    death_count_adjusted='datum.death_count*0.8'
).mark_line(clip=True,color='#ffbb78', strokeJoin='round').encode(
    alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
    alt.Y('death_count_adjusted:Q', title='', axis=alt.Axis(domainOpacity=0,ticks=False)),
    tooltip=['week:Q', 'death_count_adjusted:Q']
)

chart_3_legendDomain = [
    'Statistik Austria 2020',
#     'Statistik Austria 2020 80%',
    'Todesanzeigen 2020',
    'Todesanzeigen 2017-2019 min/max',
    'Todesanzeigen 2017-2019 mean'
]
chart_3_legendColors = [
    'orange',
#     '#ffbb78',
    '#1f77b4',
    'lightgray',
    'gray'
]
chart_3_legendData = pd.DataFrame({
    'label': chart_3_legendDomain
})

legend3 = alt.Chart(chart_3_legendData).mark_square(size=150).encode(
    y=alt.Y(
        'label:N',
        axis=alt.Axis(domain=False, ticks=False, orient='right'),
        title=None
    ),
    color=alt.Color(
        'label',
        scale=alt.Scale(domain=chart_3_legendDomain,range=chart_3_legendColors),
        legend=None
    )
)

vega_week = ((
    week_historic_area +
    week_historic_mean +
    week_statistik_austria_line +
#     week_statistik_austria_line_80 +
    week_current_line +
    week_point
) | legend3).properties(
    title={
      "text": ["Todesanzeigen Tirol pro Woche im Jahresverlauf"],
    }
).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=14,
    fontWeight='bold',
    anchor='start',
    color='gray'
)

vega_week
Out[5]:
In [6]:
def district_chart(district='Kufstein'):
    d_df = df[df['district']==district]

    d_base = alt.Chart(d_df).properties(
        width=220,
        height=125
    )

    x_domain=(1,52)
    y_domain=(0,40)

    d_week_historic_area = d_base.transform_filter(
        'datum.year!=2020 && datum.year!=2016'
    ).mark_area(color='lightgray', opacity=0.5, strokeJoin='round').transform_joinaggregate(
        week_count='sum(count)',
        groupby=['year', 'week']
    ).encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('max(week_count):Q', title=''),
        alt.Y2('min(week_count):Q', title='')
    )

    d_week_historic_mean = d_base.transform_filter(
        'datum.year!=2020 && datum.year!=2016'
    ).mark_line(color='gray', opacity=0.25, strokeJoin='round').transform_joinaggregate(
        week_count='sum(count)',
        groupby=['year', 'week']
    ).encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('mean(week_count):Q', title='')
    )

    d_week_current_line = d_base.transform_filter(
        'datum.year==2020 & datum.week<'+week_cut_off
    ).mark_line(strokeJoin='round').encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('sum(count):Q', title='', axis=alt.Axis(domainOpacity=0,ticks=False))
    )

    d_week_point = d_base.transform_filter(
        'datum.year==2020 & datum.week<'+week_cut_off
    ).mark_circle(color='gray', opacity=0, size=150).encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('sum(count):Q', scale=alt.Scale(domain=y_domain,zero=True,nice=False), title=''),
        tooltip=['year:Q', 'week:Q', 'sum(count):Q']
    )

    return (
        d_week_historic_area +
        d_week_historic_mean +
        d_week_current_line +
        d_week_point
    ).properties(
        title={
          "text": [district],
        }
    )

d_chart_3_legendDomain = [
    'Todesanzeigen 2020',
    'Todesanzeigen 2017-2019 min/max',
    'Todesanzeigen 2017-2019 mean'
]
d_chart_3_legendColors = ['#1f77b4', 'lightgray', 'gray']
d_chart_3_legendData = pd.DataFrame({
    'label': d_chart_3_legendDomain
})


d_legend3 = alt.Chart(d_chart_3_legendData).mark_square(size=150).encode(
    y=alt.Y(
        'label:N',
        axis=alt.Axis(domain=False, ticks=False, orient='right'),
        title=None
    ),
    color=alt.Color(
        'label',
        scale=alt.Scale(domain=d_chart_3_legendDomain,range=d_chart_3_legendColors),
        legend=None
    )
)

vega_week_district = (
    (
         district_chart('Imst') | district_chart('Innsbruck-Stadt') | district_chart('Innsbruck-Land')
    ) & (
        district_chart('Kitzbühel') | district_chart('Kufstein') | district_chart('Landeck')
    ) & (
        district_chart('Lienz') | district_chart('Reutte') | district_chart('Schwaz')
    ) | d_legend3
).configure_axis(
        grid=True,
        gridColor="#eee",
        domainColor="#ddd",
        tickColor="#ddd",
        labelColor="gray",
        labelBound=True,
    ).configure_view(
        strokeWidth=0
    ).configure_title(
        fontSize=14,
        fontWeight='normal',
        anchor='start',
        color='gray'
    )

vega_week_district
Out[6]:
In [7]:
def municipaly_chart(district='Kufstein'):
    d_df = df[df['district']==district]

    d_base = alt.Chart(d_df).properties(
        width=150,
        height=60
    )

    x_domain=(1,52)
    y_domain=(0,20)

    d_week_historic_area = d_base.transform_filter(
        'datum.year!=2020 && datum.year!=2016'
    ).transform_joinaggregate(
        week_count='sum(count)',
        groupby=['week','year']
    ).transform_calculate(
        week_count_min='min(datum.week_count)',
        week_count_max='max(datum.week_count)'
    ).mark_area(color='lightgray', opacity=0.5, strokeJoin='round').encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('max(week_count):Q', title=''),
        alt.Y2('min(week_count):Q', title='')
    )

    d_week_historic_mean = d_base.transform_filter(
        'datum.year!=2020 && datum.year!=2016'
    ).transform_joinaggregate(
        week_count='sum(count)',
        groupby=['week','year']
    ).mark_line(color='gray', opacity=0.25, strokeJoin='round').encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('mean(week_count):Q', title='')
    )

    d_week_current_line = d_base.transform_filter(
        'datum.year==2020 & datum.week<'+week_cut_off
    ).transform_joinaggregate(
        week_count='sum(count)',
        groupby=['week','year']
    ).mark_line(strokeJoin='round').encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('week_count:Q', title='', axis=alt.Axis(domainOpacity=0,ticks=False))
    )

    d_week_point = d_base.transform_filter(
        'datum.year==2020 & datum.week<'+week_cut_off
    ).mark_circle(color='gray', opacity=0, size=150).encode(
        alt.X('week:Q', scale=alt.Scale(domain=x_domain,zero=False,nice=False), title='', axis=alt.Axis(grid=False)),
        alt.Y('sum(count):Q', scale=alt.Scale(zero=True), title=''),
        tooltip=['year:Q', 'week:Q', 'sum(count):Q']
    )

    return (
        d_week_historic_area +
        d_week_historic_mean +
        d_week_current_line +
        d_week_point
    ).facet(
        facet=alt.Facet('municipaly:N', title=None, header=alt.Header(
            labelAnchor='start',
            labelOrient='bottom',
            labelPadding=0
        )),
        columns=5,
        padding=0,
        title={
          "text": ["Todesanzeigen im Bezirk "+district+" pro Gemeinde pro Woche im Jahresverlauf"], 
          "subtitle": [
    #               "Durchschnitt pro Monat in Hellblau",
              "Quelle: https://github.com/walterra/covid-19-tirol-ds"
          ],
          "color": "black",
          "subtitleColor": "gray"
        },
    ).configure_axis(
        grid=True,
        gridColor="#eee",
        domainColor="#ddd",
        tickColor="#ddd",
        labelColor="gray",
        labelBound=True,
    ).configure_view(
        strokeWidth=0
    ).configure_title(
        fontSize=14,
        fontWeight='bold',
        anchor='start',
        color='gray'
    )

d_chart_3_legendDomain = [
    'Todesanzeigen 2020',
    'Todesanzeigen 2017-2019 min/max',
    'Todesanzeigen 2017-2019 mean'
]
d_chart_3_legendColors = ['#1f77b4', 'lightgray', 'gray']
d_chart_3_legendData = pd.DataFrame({
    'label': d_chart_3_legendDomain
})


d_legend3 = alt.Chart(d_chart_3_legendData).mark_square(size=150).encode(
    y=alt.Y(
        'label:N',
        axis=alt.Axis(domain=False, ticks=False, orient='right'),
        title=None
    ),
    color=alt.Color(
        'label',
        scale=alt.Scale(domain=d_chart_3_legendDomain,range=d_chart_3_legendColors),
        legend=None
    )
)

vega_week_municipaly = municipaly_chart('Kufstein')

vega_week_municipaly
Out[7]:
In [8]:
# save chart_1 as formatted JSON
with open("../docs/data/vega_week.json", "w") as jsonFile:
    json.dump(json.loads(vega_week.to_json()), jsonFile, indent=4, sort_keys=True)
In [9]:
# save chart_1 as formatted JSON
with open("../docs/data/vega_week_district.json", "w") as jsonFile:
    json.dump(json.loads(vega_week_district.to_json()), jsonFile, indent=4, sort_keys=True)
In [10]:
districts = {
    "imst": "Imst",
    "innsbruck-land": "Innsbruck-Land",
    "innsbruck-stadt": "Innsbruck-Stadt",
    "kitzbuehel": "Kitzbühel",
    "kufstein": "Kufstein",
    "landeck": "Landeck",
    "reutte": "Reutte",
    "schwaz": "Schwaz",
    "lienz": "Lienz"
}

for d in districts:
    chart = municipaly_chart(districts[d])
    with open("../docs/data/vega_" + d + ".json", "w") as jsonFile:
        json.dump(json.loads(chart.to_json()), jsonFile, indent=4, sort_keys=True)